Compute recording indicies in database

This notebook computes indicies for sounds (or soundscape recordings) in a pumilio database.

Required packages

Variable declarations

working_directory – temporary directory for processing recordings



In [7]:

    
working_directory = os.path.join(os.environ['HOME'], "pymilio_temp/")

Import statements



In [1]:

    
%load_ext rpy2.ipython



In [2]:

    
import rpy2.robjects as ro



In [3]:

    
%%R

library('tuneR')
library('seewave')
library('soundecology')









    



/Users/Jake/code_sandbox/venv/lib/python3.5/site-packages/rpy2/robjects/functions.py:106: UserWarning: tuneR >= 1.0 has changed its Wave class definition.
Use updateWave(object) to convert Wave objects saved with previous versions of tuneR.

  res = super(Function, self).__call__(*new_args, **new_kwargs)



In [4]:

    
import pandas
import pandas.rpy.common as com

# new version not working...
#from rpy2.robjects import pandas2ri
#pandas2ri.activate()
#from rpy2.robjects import r









    



/Users/Jake/code_sandbox/venv/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: The pandas.rpy module is deprecated and will be removed in a future version. We refer to external packages like rpy2. 
See here for a guide on how to port your code to rpy2: http://pandas.pydata.org/pandas-docs/stable/r_interface.html
  from ipykernel import kernelapp as app



In [5]:

    
import os.path
from shutil import rmtree
import subprocess



In [6]:

    
import pyprind



In [9]:

    
from Pymilio import database

Connect to database



In [10]:

    
pumilio_db = database.Pymilio_db_connection(user='pumilio',
                                            database='pumilio',
                                            read_default_file='~/.my.cnf.pumilio')

get all sounds



In [11]:

    
sounds = pumilio_db.get_sound_paths(prepath=source_directory)

Function definitions

database functions



In [12]:

    
def insert_row(table, columns, values):
    pumilio_db.insert(table, values=(columns, values))



In [13]:

    
def update_row(table, values, where):
    pumilio_db.update(table=table, values=values, where=where)

index calculation functions



In [14]:

    
# acoustic complexity index
def calculateACI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "acoustic_complexity(sound, min_freq = NA, max_freq = NA, j = 5, fft_w = 512)"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(ACI <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    ACI = com.load_data('ACI')
    
    results = {
        'AciTotAll_left':ACI['AciTotAll_left'][0],
        'AciTotAll_right':ACI['AciTotAll_right'][0],
        'AciTotAll_left_bymin':ACI['AciTotAll_left_bymin'][0],
        'AciTotAll_right_bymin':ACI['AciTotAll_right_bymin'][0]
    }
    return results, language, command



In [15]:

    
# acoustic diversity index
def calculateADI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "acoustic_diversity(sound, max_freq = 12000, db_threshold = -50, freq_step = 1000, shannon = TRUE)"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(ADI <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    ADI = com.load_data('ADI')
    #com.convert_robj(ADI)
    # new version not working ...
    #r.data('ADI')
    #ADI = pandas2ri.ri2py(r['ADI'])
    
    results = {
        'adi_left':ADI['adi_left'][0],
        'adi_right':ADI['adi_right'][0],
        'left_band_values':str(ADI['left_band_values']),
        'left_bandrange_values':"['" + "', '".join(ADI['left_bandrange_values']) + "']",
        'right_band_values':str(ADI['right_band_values']),
        'right_bandrange_values':"['" + "', '".join(ADI['right_bandrange_values']) + "']"
    }
    return results, language, command



In [16]:

    
# acoustic evenness index
def calculateAEI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "acoustic_evenness(sound, max_freq = 12000, db_threshold = -50, freq_step = 1000"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(AEI <- {0}), file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    AEI = com.load_data('AEI')    
    
    results = {
        'aei_left':AEI['aei_left'][0],
        'aei_right':AEI['aei_right'][0]
    }
    return results, language, command



In [17]:

    
# bioacoustic index
def calculateBAI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "bioacoustic_index(sound, min_freq = 2000, max_freq = 8000, fft_w = 512)"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(BAI <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    BAI = com.load_data('BAI')
    
    results = {
        'left_area':BAI['left_area'][0],
        'right_area':BAI['right_area'][0]
    }
    return results, language, command



In [18]:

    
# nsdi index (normalized difference soundscape index)
def calculateNDSI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "ndsi(sound, fft_w = 1024, anthro_min = 1000, anthro_max = 2000, bio_min = 2000, bio_max = 12000)"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(NDSI <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    NDSI = com.load_data('NDSI')
    
    results = {    
        'ndsi_left':NDSI['ndsi_left'][0],
        'ndsi_right':NDSI['ndsi_right'][0],
        'biophony_left':NDSI['biophony_left'][0],
        'anthrophony_left':NDSI['anthrophony_left'][0],
        'biophony_right':NDSI['biophony_right'][0],
        'anthrophony_right':NDSI['anthrophony_right'][0]
    }
    return results, language, command



In [19]:

    
# soundscapespec
def calculateSSS(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = """soundscapespec(sound, wl = 1024, wn = "hamming", ovlp = 50, plot = FALSE)"""
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(SSS <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    SSS = com.load_data('SSS')
    
    results = {
        'frequency_power':str([ '{0:.4f}'.format(n) for n in SSS['amplitude'].as_matrix() ]),
    }
    return results, language, command

testing...



In [20]:

    
def log_process(string):
    log_directory = os.path.join(os.environ['HOME'], "Desktop/process_log/")
    if not os.path.exists(log_directory):
        os.mkdir(log_directory)
    log_name = "process_log.txt"
    if os.path.exists(log_directory + log_name):
        log_file = open(log_directory + log_name, 'a')
    else:
        log_file = open(log_directory + log_name, 'w')
    log_file.write(string+'\n')
    log_file.close()



In [21]:

    
#results = calculateBAI("/Users/Jake/Desktop/test/160224-160000.wav")



In [22]:

    
#sounds = {'129': '/Users/Jake/Desktop/test/160224-160000.wav'}

conversion functions



In [23]:

    
def convertFLACtoWAV(flacfile, wavfile):
    if os.path.exists(working_directory):
        rmtree(working_directory)
    os.mkdir(working_directory)
    
    subprocess.check_output(["flac", "-d", flacfile, "-o", wavfile])

Process all sounds

check and calculate missing indicies for all sounds in the database



In [24]:

    
# list of supported indicies
#indices = ['ACI', 'ADI', 'AEI', 'BAI', 'NDSI', 'SSS']
indices = ['SSS']

progress_bar = pyprind.ProgBar(len(sounds), bar_char='█', title='Process progress', monitor=True, stream=1, width=50)

for sound in sounds:
    
    # compute all indices for the current sound
    for index in indices:
        # update progress bar
        progress_bar.update(item_id = sound+" -> "+index)
        
        tablename = 'Index' + index
        row = pumilio_db.fetch_as_pandas_df(table=tablename, where="Sound = '{0}'".format(sound))
        
        # check if a calculation for the current sound and index already exists
        if len(row) == 0:
            insert_row(table=tablename, columns='Sound', values=sound)
            # create a wav file for analysis if one does not already exist in the working directory
            filename = os.path.basename(sounds[sound]).strip('.flac')
            wavfile = os.path.join(working_directory, filename) + '.wav'
            if not os.path.exists(wavfile):
                convertFLACtoWAV(sounds[sound], wavfile)
            # compute index
            results, language, command = eval("calculate{0}(wavfile)".format(index))
            # update each column in database for each part of the index returned
            for item in results:
                values = (item, results[item])
                update_row(table=tablename, values=values, where="Sound = '{0}'".format(sound))
            # check if command already exists in the 'Analyses' table
            command_row = pumilio_db.fetch_as_pandas_df(table='Analyses', where="command = '{0}'".format(command))
            if len(command_row) == 0:
                # add the command to the 'Analyses table if it does not exist
                insert_row(table='Analyses', columns='command', values=command)
                # get id of new row
                command_row = pumilio_db.fetch_as_pandas_df(table='Analyses', where="command = '{0}'".format(command))
                command_ID = command_row['ID'][0]
                # update all other columns
                update_row(table='Analyses', values=('name', index), where="ID = '{0}'".format(command_ID))
                update_row(table='Analyses', values=('language', language), where="ID = '{0}'".format(command_ID))
            elif len(command_row) == 1:
                # if the command exists, get the command_id
                command_ID = command_row['ID'][0]
            else:
                # error!
                pass
            # update the command_id for the index calculation
            update_row(table=tablename, values=('command_ID', command_ID), where="Sound = '{0}'".format(sound))
        elif len(row) == 1:
            pass
            #print("A record in the {0} table already exists for Sound {1}".format(tablename, sound))
        else:
            pass
            #print("WARNING: There were {0} analysis rows returned for SoundID = {0}".format(str(len(analyses)), sound))
    
    # cleanup R environment
    ro.r("remove('sound')")
    
    # cleanup working directory
    if os.path.exists(working_directory):
        rmtree(working_directory)

print('\n')
progress_bar.update()
print(progress_bar)









    



Process progress
0%                                              100%
[██████████████████████████████████████████████████] | ETA: 00:00:00 | Item ID: 153 -> SSS
Total time elapsed: 00:16:53



Total time elapsed: 00:16:59
Title: Process progress
  Started: 06/03/2016 11:40:52
  Finished: 06/03/2016 11:57:51
  Total time elapsed: 00:16:59
  CPU %: 89.60
  Memory %: 15.09